
# load packages 
library(ggplot2)
library(patchwork)

# font
## font <- "Gill Sans" # used in paper; but might not be installed
font <- NULL

# set seed for reproducibility
set.seed(985283)

# set parameters
s_y <- 1.87
s_psi <- 0.18
N <- seq(100, 3000, by = 100)

# a function to compute the deflation factor
compute_defl <- function(s_y, s_pis, N) {
  num <- s_psi^2
  den <- (2*s_y/sqrt(N))^2
  defl <- 1/sqrt((num/den) + 1)
  return(defl)
}

# a function to do simulations
do_sims <- function(s_y, s_psi, N) {
  # set parameters
  num_simulations <- 100  # number of simulations
  num_captured_zero <- 0  # counter for confidence intervals that contain zero
  for (i in 1:num_simulations) {
    # generate data
    group1 <- rnorm(N/2, sd = s_y) + rnorm(1, mean = 0, sd = s_psi)
    group2 <- rnorm(N/2, sd = s_y)
    
    # do t-test
    test_result <- t.test(group1, group2)
    
    # check if the confidence interval contains zero
    if (test_result$conf.int[1] < 0 && test_result$conf.int[2] > 0) {
      num_captured_zero <- num_captured_zero + 1
    }
  }
  # calculating the proportion of confidence intervals that capture zero
  prop_captured_zero <- num_captured_zero / num_simulations
  return(prop_captured_zero)
} 

# a vectorized function to do simulations
v_do_sims <- Vectorize(do_sims, vectorize.args = "N")

# compute the deflation factor
defl <- compute_defl(s_y, s_psi, N)

# compute coverage of 95% ci
cov2 <- 1 - 2*(1 - pnorm(qnorm(0.975)*defl)) # for testing; should equal cov
cov <- 1 - (pnorm(qnorm(0.975)*defl, lower.tail = FALSE) +  pnorm(qnorm(0.025)*defl, lower.tail = TRUE))

# mc coverage
mc_cov <- v_do_sims(s_y, s_psi, N)

# compute SE
se <- 2*s_y/sqrt(N)


# Figure 1 from paper
# -------------------

gg_data <- data.frame(N, se, defl, cov, mc_cov)

gg1 <- ggplot(gg_data, aes(x = N, y = defl)) + 
  geom_line() + 
  theme_bw() + 
  scale_x_continuous(labels = scales::comma) + 
  labs(x = "Sample Size",
       y = "Deflation Factor", 
       title = "A: Deflation Factor as Sample Size Varies") +
  theme(text = element_text(family = font))

gg2 <- ggplot(gg_data, aes(x = N, y = cov)) + 
  geom_line() + 
  #geom_point(aes(y = mc_cov)) + 
  theme_bw() + 
  scale_x_continuous(labels = scales::comma) + 
  scale_y_continuous(labels = scales::percent) + 
  labs(x = "Sample Size",
       y = "Typical Treatment Effect Capture Rate", 
       title = "B: Typical Treatment Effect Capture Rate As Sample Size Varies") +
  theme(text = element_text(family = font))


# simulate cis
ci_pars <- subset(gg_data, N == 1000)

n_sims <- 100
id <- factor(1:n_sims)
re <- rnorm(n_sims, mean = 0, sd = s_psi)
pt <- rnorm(n_sims, mean = 0, sd = ci_pars$se) + re
lwr <- pt - 1.96*ci_pars$se
upr <- pt + 1.96*ci_pars$se
capture <- ifelse(lwr < 0 & upr > 0, "Captures Typical Effect", "Does Not Capture Typical Effect")
ci_data <- data.frame(id, pt, lwr, upr, capture)

sum(capture == "Does Not Capture Typical Effect")
 (1 - ci_pars$cov)*n_sims

gg3 <-  ggplot(ci_data, aes(x = pt, y = id, xmin = lwr, xmax = upr, linetype = capture, shape = capture, color = capture)) + 
  scale_shape_manual(values = c(21, 19)) +  
  scale_linetype_manual(values = c("solid", "solid")) + 
  scale_color_manual(values = c("grey50", "black")) + 
  #geom_vline(xintercept = 0) + 
  geom_errorbarh(height = 0) + 
  geom_point(fill = "white") + 
  theme_bw() + 
  scale_x_continuous(breaks = 0, labels = c("Typical Treatment Effect")) +
  theme(text = element_text(family = font)) + 
  theme(legend.position = "none") + 
  labs(x = "Treatment Effect", 
       title = "C: Single-Topic Study Examples") + 
  theme(axis.title.y = element_blank(), 
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank())

lwr <- pt - 1.96*ci_pars$se/ci_pars$defl
upr <- pt + 1.96*ci_pars$se/ci_pars$defl
capture <- ifelse(lwr < 0 & upr > 0, "Captures Typical Effect", "Does Not Capture Typical Effect")
ci_data <- data.frame(id, pt, lwr, upr, capture)


gg4 <- ggplot(ci_data, aes(x = pt, y = id, xmin = lwr, xmax = upr, linetype = capture, shape = capture, color = capture)) + 
  scale_shape_manual(values = c(21, 19)) +  
  scale_linetype_manual(values = c("solid", "solid")) + 
  scale_color_manual(values = c("grey50", "black")) + 
  #geom_vline(xintercept = 0) + 
  geom_errorbarh(height = 0) + 
  geom_point(fill = "white") + 
  theme_bw() + 
  scale_x_continuous(breaks = 0, labels = c("Typical Treatment Effect")) +
  theme(text = element_text(family = font)) + 
  theme(legend.position = "none")  + 
  labs(x = "Treatment Effect", 
       title = "D: Nominal Rate Examples") + 
  theme(axis.title.y = element_blank(), 
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank())
  
((gg1 / gg2) | (gg3 + gg4)) +
  plot_annotation(
    title = "Topic Chosen at Random",
    theme = theme(plot.title = element_text(hjust = 0.5),
                  text = element_text(size = 16, family = font)))



ggsave("output/figure1.png", height = 9, width = 11, scale = 1)

# Figure 2 from paper
# -------------------

# chance of rejecting the typical treatment effect
gg_data$cov_fixed <- 1 - (pnorm(qnorm(0.025)*se + 1*s_psi, sd = se, lower.tail = TRUE) + 
  pnorm(qnorm(0.975)*se + 1*s_psi, sd = se, lower.tail = FALSE))
gg5 <- ggplot(gg_data, aes(x = N, y = cov_fixed)) + 
  geom_line() + 
  theme_bw() + 
  scale_x_continuous(labels = scales::comma) + 
  scale_y_continuous(labels = scales::percent) + 
  labs(x = "Sample Size",
       y = "Typical Treatment Effect Capture Rate", 
       title = "A: Typical Treatment Effect Capture Rate As Sample Size Varies") +
  theme(text = element_text(family = font))

# simulate cis
ci_pars <- subset(gg_data, N == 1000)

n_sims <- 40
id <- factor(1:n_sims)
pt <- rnorm(n_sims, mean = 1*s_psi, sd = ci_pars$se)
lwr <- pt - 1.96*ci_pars$se
upr <- pt + 1.96*ci_pars$se
capture <- ifelse(lwr < 0 & upr > 0, "Captures Typical Effect", "Does Not Capture Typical Effect")
ci_data <- data.frame(id, pt, lwr, upr, capture)


gg6 <-  ggplot(ci_data, aes(x = pt, y = id, xmin = lwr, xmax = upr, linetype = capture, shape = capture, color = capture)) + 
  scale_shape_manual(values = c(21, 19)) +  
  scale_linetype_manual(values = c("solid", "solid")) + 
  scale_color_manual(values = c("grey50", "black")) + 
  #geom_vline(xintercept = 0) + 
  geom_errorbarh(height = 0) + 
  geom_point(fill = "white") + 
  theme_bw() + 
  scale_x_continuous(breaks = 0, labels = c("Typical Treatment Effect")) +
  theme(text = element_text(family = font)) + 
  theme(legend.position = "none") + 
  labs(x = "Treatment Effect", 
       title = "B: Single-Topic Study Examples") + 
  theme(axis.title.y = element_blank(), 
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank())

gg5 + gg6  + plot_annotation(
  title = "Topic Chosen to Create a Large Treatment Effect",
  theme = theme(plot.title = element_text(hjust = 0.5),
                text = element_text(size = 16, family = font))) + 
  plot_layout(widths = c(2, 1))

ggsave("output/figure2.png", height = 5, width = 11, scale = 1)




